Heatmap function
create_gene_heatmap <- function(de_results_df, dge_object, font_size = 12, num_genes = 20) {
# Subset the genes
top_genes <- de_results_df$Symbol[1:num_genes]
subset_matrix <- dge_object$logCPM[top_genes, colnames(dge_object$counts)]
# Create annotation dataframe
sample_info <- dge_object$samples
annotation_df <- data.frame(
Dose = sample_info$Dose
)
rownames(annotation_df) <- colnames(subset_matrix)
# Reorder gene expression matrix
ordered_samples <- order(sample_info$Dose)
subset_matrix <- subset_matrix[, ordered_samples]
sample_info <- sample_info[ordered_samples, ]
# Create the heatmap
heatmap <- pheatmap(subset_matrix,
scale = "row",
cluster_rows = FALSE,
cluster_cols = FALSE,
color = viridis(50),
show_colnames = FALSE,
annotation_col = annotation_df,
fontsize = font_size,
silent = FALSE)
}These are the first experiments where I use version 2 chemistry. Read 2 is now the barcode and UMI. Read 1 is the cDNA. This was done to avoid reading through the TSO on Ilumina read 1
This sequencing run consists of 3 experiments:
Test version 2 protocol with a biological application from Zac Moore of Brain Cancer lab https://au-mynotebook.labarchives.com/share/Piper_Research_Project/MzEuMjAwMDAwMDAwMDAwMDAzfDE3MTc2NS8yNC9UcmVlTm9kZS8zMzczNDM0NjE1fDc5LjE5OTk5OTk5OTk5OTk5
Differential expression testing. Focus on the timepoints that show
the most dynamic changes.
Keep the 10uM dose becuase in notebook 3B this caused moderate
effects.
This object was generated in 2A_Clustering.
Subset only 10uM dose for simplicity.
sce <- readRDS(here::here(
"data/TIRE_brain_human/SCEs/", "brainCancer_subset.sce.rds"
))
dge <- scran::convertTo(sce, type="edgeR")
dose <- c(0, 1.00e-05)
dge <- dge[,dge$samples$Dose_M %in% dose]
tb <- as_tibble(dge$samples)Convert the numerical dose column to a factor. Keep the 10uM dose becuase in notebook 3B this caused moderate effects
tb$Dose <- as.factor(tb$Dose_M)
tb$Dose <- recode(tb$Dose,
"0" = "DMSO",
"1e-05" = "10uM")
dge$samples$Dose <- tb$Dose
dge <- dge[,dge$samples$Dose %in% c("DMSO", "10uM")]
dge$samples$Dose <- droplevels(dge$samples$Dose)Create a new factor for time and dose.
This is simpler than an interaction term in the design matrix.
Have a look at the important metadata.
Remove genes that are lowly expressed. 11,000 genes are kept
## Mode FALSE TRUE
## logical 13030 11222
Correct for composition biases by computing normalization factors with the trimmed mean of M-values method.
Model matrix generation.
## Dose_Time10uM_d3 Dose_Time10uM_d5 Dose_Time10uM_d7 Dose_TimeDMSO_d3
## ACGCCTATCG 0 0 0 0
## ATAGAACCGC 0 0 0 0
## ATCAAGAACG 1 0 0 0
## ATCGCGTTGT 0 1 0 0
## CAATCACTGA 0 0 0 0
## CCAAGACGTG 1 0 0 0
## Dose_TimeDMSO_d5 Dose_TimeDMSO_d7
## ACGCCTATCG 0 1
## ATAGAACCGC 1 0
## ATCAAGAACG 0 0
## ATCGCGTTGT 0 0
## CAATCACTGA 1 0
## CCAAGACGTG 0 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01654 0.01741 0.01927 0.02051 0.02352 0.03156
Create the contrast matrix. Foucs on the largest changes.
contr.matrix <- makeContrasts(
day5_vs_day3 = Dose_Time10uM_d5 - Dose_Time10uM_d3,
day7_vs_day3 = Dose_Time10uM_d7 - Dose_Time10uM_d3,
day7_vs_day5 = Dose_Time10uM_d7 - Dose_Time10uM_d5,
levels = colnames(design))In each contrast, the format is A - B where:
So the baseline is always the earlier timepoint
Fit BCV
par(mfrow=c(1,2))
v <- voom(dge, design, plot=TRUE)
vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
plotSA(efit, main="Final model: Mean-variance trend")day5 <- topTable(efit, coef=1, n=length(efit$genes$ID), sort.by = "logFC")
results <- as_tibble(day5)
results$ID <- rownames(day5)
# add a column of NAs
results$DElabel <- "n/s"
# if log2Foldchange > 1 or < -1 and pvalue < 0.05, set as "UP"
results$DElabel[results$logFC > 1 & results$adj.P.Val < 0.1] <- "Day5"
results$DElabel[results$logFC < -1 & results$adj.P.Val < 0.1] <- "Day3"Add gene labels
results$genelabels <- ""
results$genelabels[results$Symbol == "CDCA5"] <- "CDCA5"
results$genelabels[results$Symbol == "PBX1"] <- "PBX1"
results$genelabels[results$Symbol == "LDLR"] <- "LDLR"
results$genelabels[results$Symbol == "EXO1"] <- "EXO1"
results$genelabels[results$Symbol == "SPON1"] <- "SPON1"
results$genelabels[results$Symbol == "ABCA5"] <- "ABCA5"
results$genelabels[results$Symbol == "MRPS6"] <- "MRPS6"
results$genelabels[results$Symbol == "NEAT1"] <- "NEAT1"results$DElabel <- factor(results$DElabel, levels = c("Day5", "n/s", "Day3")) # Adjust as per your actual labels
plt1 <- ggplot(data = results, aes(x = logFC, y = -log10(adj.P.Val), colour = DElabel, label = genelabels)) +
geom_point(alpha = 0.33, size = 1.5) +
geom_text_repel(size = 4, colour = "black") +
guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
scale_color_manual(
values = c("darkorange", "grey", "darkblue"), # Colors in the desired order
name = "Upregulated",
labels = c("Day5", "n/s", "Day3") # Optional: Add custom labels
) +
geom_vline(xintercept = 1, linetype = "dotted") +
geom_vline(xintercept = -1, linetype = "dotted") +
theme_Publication()
plt1Need to convert geneIDs from ensembl to enterez
geneids <- as.data.frame(v$genes$ID)
colnames(geneids) <- "ENSEMBL"
geneids$entrez <- mapIds(org.Hs.eg.db, keys = geneids$ENSEMBL, keytype = "ENSEMBL", column = "ENTREZID")The things that go down are the typical cell cycle arrest stuff. What goes up is more interesting in the ANGIOGENESIS and protein secretion.
load("data/MSigDB/human_H_v5p2.rdata")
idx <- ids2indices(Hs.H,identifiers = geneids$entrez)
cam.day5_3<- camera(v,idx,design,contrast=contr.matrix[,1])
head(cam.day5_3,10)Visualize the gene set testing.
par(mfrow=c(1,1))
barcodeplot(efit$t[,1], index=idx$HALLMARK_ANGIOGENESIS,
index2 = idx$HALLMARK_E2F_TARGETS)(top)HALLMARK_ANGIOGENESIS (bottom)HALLMARK_E2F_TARGETS
Visualize as a barplot
day7 <- topTable(efit, coef=2, n=length(efit$genes$ID), sort.by = "logFC")
results <- as_tibble(day7)
results$ID <- rownames(day7)
# add a column of NAs
results$DElabel <- "n/s"
# if log2Foldchange > 1 or < -1 and pvalue < 0.05, set as "UP"
results$DElabel[results$logFC > 1 & results$adj.P.Val < 0.1] <- "Day7"
results$DElabel[results$logFC < -1 & results$adj.P.Val < 0.1] <- "Day3"Add gene labels
results$genelabels <- ""
results$genelabels[results$Symbol == "PBX1"] <- "PBX1"
results$genelabels[results$Symbol == "RPH3A"] <- "RPH3A"
results$genelabels[results$Symbol == "KIAA0391"] <- "KIAA0391"
results$genelabels[results$Symbol == "CENPP"] <- "CENPP"
results$genelabels[results$Symbol == "CHTF18"] <- "CHTF18"
results$genelabels[results$Symbol == "FOSL2"] <- "FOSL2"results$DElabel <- factor(results$DElabel, levels = c("Day7", "n/s", "Day3")) # Adjust as per your actual labels
plt2 <- ggplot(data = results, aes(x = logFC, y = -log10(adj.P.Val), colour = DElabel, label = genelabels)) +
geom_point(alpha = 0.33, size = 1.5) +
geom_text_repel(size = 4, colour = "black") +
guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
scale_color_manual(
values = c("darkorange", "grey", "darkblue"), # Colors in the desired order
name = "Upregulated",
labels = c("Day7", "n/s", "Day3") # Optional: Add custom labels
) +
geom_vline(xintercept = 1, linetype = "dotted") +
geom_vline(xintercept = -1, linetype = "dotted") +
theme_Publication()
plt2The things that go down are the typical cell cycle arrest stuff. What goes up is more interesting in the ANGIOGENESIS and hedgehog signalling.
Visualize the gene set testing.
par(mfrow=c(1,1))
barcodeplot(efit$t[,1], index=idx$HALLMARK_HEDGEHOG_SIGNALING,
index2 = idx$HALLMARK_E2F_TARGETS)(top)HALLMARK_HEDGEHOG_SIGNALING (bottom)HALLMARK_E2F_TARGETS
Visualize as a barplot
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Red Hat Enterprise Linux 9.5 (Plow)
##
## Matrix products: default
## BLAS: /stornext/System/data/software/rhel/9/base/tools/R/4.4.1/lib64/R/lib/libRblas.so
## LAPACK: /stornext/System/data/software/rhel/9/base/tools/R/4.4.1/lib64/R/lib/libRlapack.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggthemes_5.1.0 here_1.0.1
## [3] patchwork_1.3.0 viridis_0.6.5
## [5] viridisLite_0.4.2 pheatmap_1.0.12
## [7] ggrepel_0.9.6 scater_1.32.1
## [9] org.Hs.eg.db_3.19.1 AnnotationDbi_1.66.0
## [11] edgeR_4.2.2 limma_3.60.6
## [13] biomaRt_2.60.1 scuttle_1.14.0
## [15] lubridate_1.9.3 forcats_1.0.0
## [17] stringr_1.5.1 dplyr_1.1.4
## [19] purrr_1.0.2 readr_2.1.5
## [21] tidyr_1.3.1 tibble_3.2.1
## [23] ggplot2_3.5.1 tidyverse_2.0.0
## [25] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
## [27] Biobase_2.64.0 GenomicRanges_1.56.2
## [29] GenomeInfoDb_1.40.1 IRanges_2.38.1
## [31] S4Vectors_0.42.1 BiocGenerics_0.50.0
## [33] MatrixGenerics_1.16.0 matrixStats_1.4.1
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.0
## [3] jsonlite_1.8.9 magrittr_2.0.3
## [5] ggbeeswarm_0.7.2 farver_2.1.2
## [7] rmarkdown_2.28 zlibbioc_1.50.0
## [9] vctrs_0.6.5 memoise_2.0.1
## [11] DelayedMatrixStats_1.26.0 htmltools_0.5.8.1
## [13] S4Arrays_1.4.1 progress_1.2.3
## [15] curl_5.2.3 BiocNeighbors_1.22.0
## [17] SparseArray_1.4.8 sass_0.4.9
## [19] bslib_0.8.0 httr2_1.0.5
## [21] cachem_1.1.0 igraph_2.0.3
## [23] lifecycle_1.0.4 pkgconfig_2.0.3
## [25] rsvd_1.0.5 Matrix_1.7-0
## [27] R6_2.5.1 fastmap_1.2.0
## [29] GenomeInfoDbData_1.2.12 digest_0.6.37
## [31] colorspace_2.1-1 rprojroot_2.0.4
## [33] dqrng_0.4.1 irlba_2.3.5.1
## [35] RSQLite_2.3.7 beachmat_2.20.0
## [37] labeling_0.4.3 filelock_1.0.3
## [39] fansi_1.0.6 timechange_0.3.0
## [41] httr_1.4.7 abind_1.4-8
## [43] compiler_4.4.1 bit64_4.5.2
## [45] withr_3.0.1 BiocParallel_1.38.0
## [47] DBI_1.2.3 highr_0.11
## [49] rappdirs_0.3.3 DelayedArray_0.30.1
## [51] bluster_1.14.0 tools_4.4.1
## [53] vipor_0.4.7 beeswarm_0.4.0
## [55] glue_1.8.0 cluster_2.1.6
## [57] generics_0.1.3 gtable_0.3.5
## [59] tzdb_0.4.0 hms_1.1.3
## [61] metapod_1.12.0 BiocSingular_1.20.0
## [63] ScaledMatrix_1.12.0 xml2_1.3.6
## [65] utf8_1.2.4 XVector_0.44.0
## [67] pillar_1.9.0 splines_4.4.1
## [69] BiocFileCache_2.12.0 lattice_0.22-6
## [71] bit_4.5.0 tidyselect_1.2.1
## [73] locfit_1.5-9.10 Biostrings_2.72.1
## [75] knitr_1.48 gridExtra_2.3
## [77] xfun_0.48 statmod_1.5.0
## [79] stringi_1.8.4 UCSC.utils_1.0.0
## [81] yaml_2.3.10 evaluate_1.0.1
## [83] codetools_0.2-20 cli_3.6.3
## [85] munsell_0.5.1 jquerylib_0.1.4
## [87] Rcpp_1.0.13 dbplyr_2.5.0
## [89] png_0.1-8 parallel_4.4.1
## [91] blob_1.2.4 prettyunits_1.2.0
## [93] scran_1.32.0 sparseMatrixStats_1.16.0
## [95] scales_1.3.0 crayon_1.5.3
## [97] rlang_1.1.4 KEGGREST_1.44.1